Digits classification
Load the data.
mnist <- read_mnist()
glimpse(mnist)
List of 2
$ train:List of 2
..$ images: int [1:60000, 1:784] 0 0 0 0 0 0 0 0 0 0 ...
..$ labels: int [1:60000] 5 0 4 1 9 2 1 3 1 4 ...
$ test :List of 2
..$ images: int [1:10000, 1:784] 0 0 0 0 0 0 0 0 0 0 ...
..$ labels: int [1:10000] 7 2 1 0 4 1 4 9 5 9 ...
Save the features into a different variable.
features <- mnist$train$images
glimpse(features)
int [1:60000, 1:784] 0 0 0 0 0 0 0 0 0 0 ...
Converting our data to matrix.
d <- matrix(features[1,], nrow = 28, byrow = TRUE)
digito <- melt(d)
digito
We plot the resulting image.
digito %>%
ggplot(aes(x = Var2, y = Var1)) +
geom_raster(aes(fill = factor(value))) +
scale_y_reverse() + scale_fill_grey() +
theme(legend.position = "none")

p <- function(i){
d <- matrix(features[i,], nrow = 28, byrow = TRUE)
digito<-melt(d)
ggplot(digito, aes(x = Var2, y = Var1)) +
geom_raster(aes(fill=value)) + coord_fixed() +
theme_void() + theme(legend.position = "none") +
scale_y_reverse()
}
p(1) + p(2) + p(3) + p(4) + p(5) + p(6)

We get the total occurences for each digit.
etiquetas <- mnist$train$labels
etiquetas %>%
as_tibble() %>%
mutate(cant = 1) %>%
group_by(value) %>%
summarise(total = sum(cant))
`summarise()` ungrouping output (override with `.groups` argument)
features2 <- features %>%
as_tibble() %>%
mutate(digito = etiquetas,
n = 1:60000) %>%
select(n, digito, everything())
features2
features_long <- features2 %>%
pivot_longer(cols = -c(n, digito)) %>%
mutate(Var1 = rep(1:28, times = 28 * 60000),
Var2 = rep(1:28, each = 28, times = 60000)) %>%
select(-name)
features_long
We create a function that selects a subset of the data and plots the result.
For example, taking the first 12 rows.
p <- function(i){
features_long %>%
filter(n %in% i) %>%
ggplot(aes(x = Var1, y = Var2)) +
geom_raster(aes(fill = factor(value))) +
scale_y_reverse() + scale_fill_grey() +
theme(legend.position = "none") +
facet_wrap(~ n)
}
p(1:12)

tibble(Var1 = 3,
Var2 = 6,
lab = features_long %>% group_by(n) %>% distinct(digito) %>% pull(digito)) %>%
slice(2:4)
Getting now the second 12 rows:

mnist_clustering <- kmeans(features, centers = 10, nstart = 10)
did not converge in 10 iterationsQuick-TRANSfer stage steps exceeded maximum (= 3000000)Quick-TRANSfer stage steps exceeded maximum (= 3000000)Quick-TRANSfer stage steps exceeded maximum (= 3000000)
num [1:10, 1:784] 0 0 0 0 0 0 0 0 0 0 ...
- attr(*, "dimnames")=List of 2
..$ : chr [1:10] "1" "2" "3" "4" ...
..$ : NULL
LS0tDQp0aXRsZTogIkJhc2ljIGNsYXNzaWZpY2F0aW9uIGV4YW1wbGVzIg0KYXV0aG9yOiAiUGFibG8gQmVuYXZpZGVzLUhlcnJlcmEiDQpkYXRlOiAyMDIwLTA2LTI3DQpvdXRwdXQ6IA0KICBodG1sX25vdGVib29rOg0KICAgIHRvYzogVFJVRQ0KICAgIHRvY19mbG9hdDogVFJVRQ0KICAgIHRoZW1lOiBzcGFjZWxhYg0KICAgIGhpZ2hsaWdodDogdGFuZ28NCi0tLQ0KDQpgYGB7ciBzZXR1cCwgaW5jbHVkZT1GQUxTRX0NCmtuaXRyOjpvcHRzX2NodW5rJHNldChtZXNzYWdlID0gVFJVRSwgd2FybmluZyA9IFRSVUUpDQpgYGANCg0KIyBQcmVycmVxdWlzaXRlcw0KDQpgYGB7ciBwa2dzLCBtZXNzYWdlPUZBTFNFfQ0KbGlicmFyeSh0aWR5dmVyc2UpDQpsaWJyYXJ5KGRzbGFicykNCmxpYnJhcnkocmVzaGFwZTIpDQpsaWJyYXJ5KHBhdGNod29yaykNCmxpYnJhcnkodGlkeXF1YW50KQ0KYGBgDQoNCiMgRGlnaXRzIGNsYXNzaWZpY2F0aW9uDQoNCkxvYWQgdGhlIGRhdGEuDQoNCmBgYHtyIGRpZ2l0c30NCm1uaXN0IDwtIHJlYWRfbW5pc3QoKQ0KZ2xpbXBzZShtbmlzdCkNCmBgYA0KDQpTYXZlIHRoZSBmZWF0dXJlcyBpbnRvIGEgZGlmZmVyZW50IHZhcmlhYmxlLg0KDQpgYGB7cn0NCmZlYXR1cmVzIDwtIG1uaXN0JHRyYWluJGltYWdlcw0KZ2xpbXBzZShmZWF0dXJlcykNCmBgYA0KDQpDb252ZXJ0aW5nIG91ciBkYXRhIHRvIG1hdHJpeC4NCg0KYGBge3IgZGlnaXQgbWF0cml4fQ0KZCA8LSBtYXRyaXgoZmVhdHVyZXNbMSxdLCBucm93ID0gMjgsIGJ5cm93ID0gVFJVRSkNCmRpZ2l0byA8LSBtZWx0KGQpDQpkaWdpdG8NCmBgYA0KDQpXZSBwbG90IHRoZSByZXN1bHRpbmcgaW1hZ2UuDQoNCmBgYHtyIGRpZ2l0IHBsb3R9DQpkaWdpdG8gJT4lIA0KICBnZ3Bsb3QoYWVzKHggPSBWYXIyLCB5ID0gVmFyMSkpICsNCiAgZ2VvbV9yYXN0ZXIoYWVzKGZpbGwgPSBmYWN0b3IodmFsdWUpKSkgKw0KICBzY2FsZV95X3JldmVyc2UoKSArIHNjYWxlX2ZpbGxfZ3JleSgpICsNCiAgdGhlbWUobGVnZW5kLnBvc2l0aW9uID0gIm5vbmUiKQ0KYGBgDQoNCmBgYHtyIGRpZ2l0IHBsb3QgZnVuY3Rpb259DQpwIDwtIGZ1bmN0aW9uKGkpew0KICAgIGQgPC0gbWF0cml4KGZlYXR1cmVzW2ksXSwgbnJvdyA9IDI4LCBieXJvdyA9IFRSVUUpDQogICAgZGlnaXRvPC1tZWx0KGQpDQogICAgZ2dwbG90KGRpZ2l0bywgYWVzKHggPSBWYXIyLCB5ID0gVmFyMSkpICsNCiAgICBnZW9tX3Jhc3RlcihhZXMoZmlsbD12YWx1ZSkpICsgIGNvb3JkX2ZpeGVkKCkgKw0KICAgICAgdGhlbWVfdm9pZCgpICsgdGhlbWUobGVnZW5kLnBvc2l0aW9uID0gIm5vbmUiKSArDQogICAgICBzY2FsZV95X3JldmVyc2UoKQ0KICB9DQpgYGANCg0KYGBge3IgZGlnaXQgcGxvdCBleGFtcGxlfQ0KcCgxKSArIHAoMikgKyBwKDMpICsgcCg0KSArIHAoNSkgKyBwKDYpDQpgYGANCg0KV2UgZ2V0IHRoZSB0b3RhbCBvY2N1cmVuY2VzIGZvciBlYWNoIGRpZ2l0Lg0KDQpgYGB7ciBkaWdpdCBsYWJlbCBzdW1tYXJ5fQ0KZXRpcXVldGFzIDwtIG1uaXN0JHRyYWluJGxhYmVscyANCmV0aXF1ZXRhcyAlPiUNCiAgYXNfdGliYmxlKCkgJT4lIA0KICBtdXRhdGUoY2FudCA9IDEpICU+JQ0KICBncm91cF9ieSh2YWx1ZSkgJT4lDQogICAgc3VtbWFyaXNlKHRvdGFsID0gc3VtKGNhbnQpKQ0KYGBgDQoNCmBgYHtyfQ0KZCAlPiUgDQogIGFzX3RpYmJsZSgpDQpgYGANCg0KDQpgYGB7cn0NCmZlYXR1cmVzMiA8LSBmZWF0dXJlcyAlPiUgDQogIGFzX3RpYmJsZSgpICU+JSANCiAgbXV0YXRlKGRpZ2l0byA9IGV0aXF1ZXRhcywNCiAgICAgICAgIG4gPSAxOjYwMDAwKSAlPiUgDQogIHNlbGVjdChuLCBkaWdpdG8sIGV2ZXJ5dGhpbmcoKSkNCmZlYXR1cmVzMg0KYGBgDQoNCg0KDQpgYGB7cn0NCmZlYXR1cmVzX2xvbmcgPC0gZmVhdHVyZXMyICU+JSANCiAgcGl2b3RfbG9uZ2VyKGNvbHMgPSAtYyhuLCBkaWdpdG8pKSAlPiUgDQogIG11dGF0ZShWYXIxID0gcmVwKDE6MjgsIHRpbWVzID0gMjggKiA2MDAwMCksDQogICAgICAgICBWYXIyID0gcmVwKDE6MjgsIGVhY2ggPSAyOCwgdGltZXMgPSA2MDAwMCkpICU+JQ0KICBzZWxlY3QoLW5hbWUpDQpmZWF0dXJlc19sb25nDQpgYGANCg0KV2UgY3JlYXRlIGEgZnVuY3Rpb24gdGhhdCBzZWxlY3RzIGEgc3Vic2V0IG9mIHRoZSBkYXRhIGFuZCBwbG90cyB0aGUgcmVzdWx0Lg0KDQpGb3IgZXhhbXBsZSwgdGFraW5nIHRoZSBmaXJzdCAxMiByb3dzLg0KDQpgYGB7cn0NCnAgPC0gZnVuY3Rpb24oaSl7DQogIGZlYXR1cmVzX2xvbmcgJT4lIA0KICAgIGZpbHRlcihuICVpbiUgaSkgJT4lDQogICAgZ2dwbG90KGFlcyh4ID0gVmFyMSwgeSA9IFZhcjIpKSArDQogICAgZ2VvbV9yYXN0ZXIoYWVzKGZpbGwgPSBmYWN0b3IodmFsdWUpKSkgKw0KICAgIHNjYWxlX3lfcmV2ZXJzZSgpICsgc2NhbGVfZmlsbF9ncmV5KCkgKw0KICAgIHRoZW1lKGxlZ2VuZC5wb3NpdGlvbiA9ICJub25lIikgKyANCiAgICBmYWNldF93cmFwKH4gbikNCn0NCg0KcCgxOjEyKQ0KYGBgDQoNCmBgYHtyfQ0KdGliYmxlKFZhcjEgPSAzLA0KICAgICAgICAgICAgICAgICAgVmFyMiA9IDYsDQogICAgICAgICAgICAgICAgICBsYWIgPSBmZWF0dXJlc19sb25nICU+JSBncm91cF9ieShuKSAlPiUgZGlzdGluY3QoZGlnaXRvKSAlPiUgcHVsbChkaWdpdG8pKSAlPiUgDQogIHNsaWNlKDI6NCkNCmBgYA0KDQoNCkdldHRpbmcgbm93IHRoZSBzZWNvbmQgMTIgcm93czoNCg0KYGBge3J9DQpwKDEzOjI0KQ0KYGBgDQoNCg0KDQoNCg0KDQpgYGB7cn0NCm1uaXN0X2NsdXN0ZXJpbmcgPC0ga21lYW5zKGZlYXR1cmVzLCBjZW50ZXJzID0gMTAsIG5zdGFydCA9IDEwKQ0KYGBgDQoNCmBgYHtyfQ0KbW5pc3RfY2VudGVycyA8LSBtbmlzdF9jbHVzdGVyaW5nJGNlbnRlcnMNCmdsaW1wc2UobW5pc3RfY2VudGVycykNCmBgYA0KDQoNCg0KDQo=